import asyncio
import os
from dotenv import load_dotenv
import json
from crawl4ai import JsonCssExtractionStrategy, LLMConfig

# 加载.env文件中的环境变量
load_dotenv()

async def main(html_file):

    # 读取 html 文件
    with open(html_file, 'r', encoding='utf-8') as file:
        html_content = file.read()
    
    schema = JsonCssExtractionStrategy.generate_schema(
        html_content,
        llm_config = LLMConfig(
            provider=os.getenv('PROVIDER'),
            api_token=os.getenv('DEEPSEEK_API_KEY'),
        )
    )

    strategy = JsonCssExtractionStrategy(schema)
    
    print(json.dumps(schema, indent=2))

if __name__ == "__main__":
    asyncio.run(main('course_content.html'))